#libraries used for the analysis and set up the workplace

library(data.table)
library(ggplot2)
library(tidyverse)
library(survival)
library(ggpubr)
library(survminer)
library(survival)
library(bdsmatrix)
library(coxme)
library(MuMIn)
library(car)
library(tufte)
library(ggthemes)
library(plyr)
library(ggpubr)
library(rstatix)
library(FSA)

setwd("C:/Users/abdallaa/OneDrive - IAEA/My_passport_6/Dera_2023/Spiroplasma_MS_V2/revised/Vesrion_4/Spiro_Ms_20240815/Submission/Submission")

## Evaluation of the dynamic of Spiroplasma in Gff colony (Supplementary figure 4)

data2 <- read.csv("data2.csv")
str(data2)
data2$Expression=as.numeric(data2$Expression)
rep1=na.omit(data2)
attach(data2)
#plot of the Normalized density of Spiroplasma according to the age
figS4.tiff<-ggplot(data2,aes(x=Age ,y=Expression)) +  geom_boxplot() + stat_summary(fun= mean, colour="darkred", geom = "point", shape=18, size=3, show.legend = FALSE) + 
  geom_jitter(width=0.1,alpha=0.2) + ylim(0, 4.5) + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust=1)) + scale_x_discrete(limits = c("Larvae","Pupae_Day1", "Pupae_Day10", "Pupae_Day20", "Adult_Day1","Adult_Day2","Adult_Day3", "Adult_Day5","Adult_Day7", "Adult_Day14", "Adult_Day21", "Adult_Day30" ))

figS4.tiff
tiff("figS4.tiff", width = 12, height = 4, units = 'in', res = 300)
plot(figS4.tiff + theme_tufte() + 
       theme(axis.line = element_line(size = 1, colour = "black"))) + 
  xlab(expression(bolditalic("Age of the flies"))) + 
  ylab( expression (paste (bold("Normalized density of "), bolditalic(" Spiroplasma"), )))
dev.off()
shapiro.test(data2$Expression) #<0.05 so not normally distributed

kruskal.test(Expression~Age, data = data2)
tmp <- dunnTest(Expression~Age, data = data2, method ="none")
tmp

## Evaluation of the impact of Spiroplasma on the length of the pupal period,  emergence rate, fly propensity and damaged wings
data_qc <- read.csv("QC_test.csv")
head(data_qc)
str(data_qc)
attach(data_qc)

#test of normality: shapiro test, if p>0.05 means the data is normalized distributed and we can use t.test, otherwise use wilcoxon 
shapiro.test(data_qc$Age) #p<0.05 so the data are not normal, use wilcoxon test
shapiro.test(data_qc$Emergence) #p<005 so the data are not normal, use wilcoxon test
shapiro.test(data_qc$Flyers) #p>0.05 so the data are normal, use t.test
shapiro.test(data_qc$Non.flyers) #p>0.05 so the data are normal, use t.test
shapiro.test(data_qc$Damage_wings) #p<0.05 so the data are not normal, use wilcoxon test

#Length of the pupal period (figure 2A)
fig2a.tiff <- ggplot(data_qc,aes(x=Status ,y=Age, fill = Status)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") +
  theme_bw() + labs( x = "Infection status", y = "Pupal period (days)") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(10, 35) + scale_fill_manual(values=c("skyblue", "coral1")) + stat_compare_means(method = "wilcox.test") + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
fig2a.tiff
tiff("fig2a.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(fig2a.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Infection status"))) + ylab( expression (paste (bold("Pupal period (Days)"), )))
dev.off()
wilcox.test(Age~Status, data=data_qc)

#Emergence rate (figure 2B)
fig2b.tiff<- ggplot(data_qc,aes(x=Status ,y=Emergence, fill = Status)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") + # Add points to plot
  theme_bw() + labs( x = "Infection status", y = "Emergence rate (%)") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(60, 110) + scale_fill_manual(values=c("skyblue", "coral1")) + stat_compare_means(method = "wilcox.test") + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
fig2b.tiff
tiff("fig2b.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(fig2b.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Infection status"))) + ylab( expression (paste (bold("Emergence rate (%)"), )))
dev.off()
wilcox.test(Emergence~Status, data=data_qc)

#Fly propensity (figure 2C)
fig2c.tiff<- ggplot(data_qc,aes(x=Status ,y=Flyers, fill = Status)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") + # Add points to plot
  theme_bw() + labs( x = "Infection status", y = "Fly propensity (%)") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(60, 110) + scale_fill_manual(values=c("skyblue", "coral1")) + stat_compare_means(method = "t.test") + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
fig2c.tiff
tiff("fig2c.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(fig2c.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Infection status"))) + ylab( expression (paste (bold("Fly propensity (%)"), )))
dev.off()
t.test(Flyers~Status, data=data_qc)

#Damaged wings (Supplementary figure 5) 
figS5.tiff <- ggplot(data_qc,aes(x=Status ,y=Damage_wings, fill = Status)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") + # Add points to plot
  theme_bw() + labs( x = "Infection status", y = "Percentage of damaged wings (%)") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(0, 5) + scale_fill_manual(values=c("skyblue", "coral1")) + stat_compare_means(method = "wilcox.test") + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
figS5.tiff
tiff("figS5.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(figS5.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Infection status"))) + ylab( expression (paste (bold("Percentage of damaged wings (%)"), )))
dev.off()
wilcox.test(Damage_wings~Status, data=data_qc)



## Evaluation of the impact of Spiroplasma on the productivity (pf10d) (Figure 3) and spermatheca fill (Supplementary figure 6A and 6B)
data_prod <- fread("productivity.csv")
head(data_prod)
str(data_prod)
attach(data_prod)
data_prod <- transform(data_prod,
                       replicate=factor(replicate),
                       status=factor(status))
data_prod$pf10dm=as.numeric(data_prod$pf10dm)
data_prod$Stheca_1=as.numeric(data_prod$Stheca_1)
data_prod$Stheca_2=as.numeric(data_prod$Stheca_2)

###test of normality : shapiro test, if p>0.05 means the data is normalized distributed and we can use t.test, otherwise use wilcoxon 
shapiro.test(data_prod$pf10dm)  #p<0.05 not normally distributed so use wilcoxon test
shapiro.test(data_prod$age_first_pupae) #p<0.05 not normally distributed so use wilcoxon test

### plot for the pf10d (figure 3)
fig3.tiff <- ggplot(data_prod,aes(x=status ,y=pf10dm, fill = status)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") + # Add points to plot
  theme_bw() + labs( x = "Infection status", y = "Pupae per female per 10 days") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(0, 1.2) + scale_fill_manual(values=c("skyblue", "coral1")) + stat_compare_means(method = "wilcox.test") + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
fig3.tiff
tiff("fig3.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(fig3.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Infection status"))) + ylab( expression (paste (bold("Pupae per female per 10 days"), )))
dev.off()

model1<-glm(pf10dm ~ status, data = data_prod)
summary(model1)
anova(model1)
Anova(model1)

# Impact of Spiroplasma on the Mean of Spermatheca fill (supplementary figure 6A)
figS6A.tiff <- ggplot(data_prod,aes(x=status ,y=Stheca_1, fill = status)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") + # Add points to plot
  labs( x = "Infection status", y = "Spermatheca fill") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(0, 1.2) + scale_fill_manual(values=c("skyblue", "coral1")) + stat_compare_means(method = "wilcox.test") + theme_bw() +
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
figS6A.tiff
tiff("figS6A.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(figS6A.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Infection status"))) + ylab( expression (paste (bold("Spermatheca fill"), )))
dev.off()

### Impact of Spiroplasma on the Frequencies of Spermatheca fill score (supplementary figure 6B)
data <- read.csv("dissection.csv")
data <- transform(data, replicate=factor(replicate),
                  status=factor(status),
                  spermatheca_score=factor(spermatheca_score))

# creation of the mean and the sd
data2 <- ddply(data, c("status", "spermatheca_score"), summarise,
               N    = length(frequencies),
               mean = mean(frequencies),
               sd   = sd(frequencies),
               se   = sd / sqrt(N)
)

# choiche of the test to use
shapiro.test(data2$mean) ##p<0.05 so not normal, wilcoxon test

#plot the boxplot with the error bar
str(data2)
figS6B.tiff <- ggplot(data2, aes(x=status, y=mean, fill=spermatheca_score)) + 
  geom_bar(stat="identity", color="black", 
           position=position_dodge()) +
  geom_errorbar(aes(ymin=mean, ymax=mean+sd), width=.2,
                position=position_dodge(.9)) +
  xlab("Degree of Spermatheca fill according to the fly infection status") + 
  ylab("Frequencies (%)") +   scale_y_continuous(breaks=0:100*10) +
  theme_bw() + theme_classic() 
figS6B.tiff 
tiff("figS6B.tiff", width = 9, height = 4, units = 'in', res = 300)
plot(figS6B.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Infection status"))) + ylab( expression (paste (bold("Frequencies of the score of spermatheca fill (%)"), )))
dev.off()

## Evaluation of the impact of Spiroplasma on the pupal size (figure 4) 
size <- read.csv("pupae_size.csv")
str(size)
size$replicate=as.factor(size$replicate)
size$status=as.factor(size$status)
size$cycle=as.factor(size$cycle)
### creation of the mean and the sd
size2 <- ddply(size, c("status", "categogie"), summarise,
               N    = length(frequencies),
               mean = mean(frequencies),
               sd   = sd(frequencies),
               se   = sd / sqrt(N)
)

size2
size2$status=as.factor(size2$status)
str(size2)

## Choice of the test to use
shapiro.test(size2$mean) ## p>0.05 Noramalized data, so t-test
##plot the boxplot with the error bar (figure 7)
fig4.tiff <- ggplot(size2, aes(x=categogie, y=mean, fill=status)) + 
  geom_bar(stat="identity", color="black", 
           position=position_dodge()) +
  geom_errorbar(aes(ymin=mean, ymax=mean+sd), width=.2,
                position=position_dodge(.9)) +
  xlab("Size of the pupae according to the fly infection status") + 
  ylab("Frequencies (%)") +   scale_y_continuous(breaks=0:100*10) +
  theme_bw() + theme_classic() + stat_compare_means(method = "t.test") + 
  scale_fill_manual(values=c("skyblue", "coral1"))
fig4.tiff
tiff("fig4.tiff", width = 5, height = 4, units = 'in', res = 300)
plot(fig4.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Size of the pupae according to the fly infection status"))) + ylab( expression (paste (bold("Frequencies (%)"), )))
dev.off()

model1<-glm(mean ~ categogie+status, data = size2)
summary(model1)
anova(model1)
Anova(model1)

#subset of the data to check the significant differences between the categories
size_A <- subset(size,categogie=="A")
model1<-glm(frequencies ~ status, data = size_A)
summary(model1)
anova(model1)
Anova(model1)

size_B <- subset(size,categogie=="B")
model1<-glm(frequencies ~ status, data = size_B)
summary(model1)
anova(model1)
Anova(model1)

size_C <- subset(size,categogie=="C")
model1<-glm(frequencies ~ status, data = size_C)
summary(model1)
anova(model1)
Anova(model1)

size_D <- subset(size,categogie=="D")
model1<-glm(frequencies ~ status, data = size_D)
summary(model1)
anova(model1)
Anova(model1)

size_E <- subset(size,categogie=="E")
model1<-glm(frequencies ~ status, data = size_E)
summary(model1)
anova(model1)
Anova(model1)


## Evaluation of the impact of Spiroplasma on the Survival under starvation (figure 5A)

data_surv <- read.csv("spi_survival.csv")
head(data_surv)

data_surv$Replicate=as.factor(data_surv$Replicate)
data_surv$Treatment=as.factor(data_surv$Treatment)
data_surv$Sex=as.factor(data_surv$Sex)
data_surv$Infection=as.factor(data_surv$Infection)

str(data_surv)
summary(data_surv)
# checking for the best model
mod1<- coxme(Surv(Day, Status)~Treatment + ( 1|Replicate), data = data_surv)
mod2<- coxme(Surv(Day, Status)~Infection + Sex + ( 1|Replicate), data = data_surv)
mod3<- coxme(Surv(Day, Status)~Infection + ( 1|Replicate), data = data_surv)
mod4<- coxme(Surv(Day, Status)~Sex + ( 1|Replicate), data = data_surv)

AICc(mod1,mod2, mod3, mod4) #models 1 and 2 explained well the difference

summary(mod1)
summary (mod2)
summary (mod3)
summary (mod4)

Anova(mod1)
Anova(mod2)
Anova(mod3)
Anova(mod4)

#Graph of the survival (figure 5A)
km <- with(data_surv, Surv(Day, Status))
head(km,49)
km_trt_fit <- survfit(Surv(Day, Status) ~ Treatment, data=data_surv)
plot(survfit(Surv(Day,Status)~Treatment,data=data_surv), xlab = "Day", ylab = "Survival rate", col=c('black','red','blue','green'), lwd=2, xlim =c(0, 12)) 
legend('bottomleft', text.font = , cex=0.8, c("Spi-_Females","Spi-_Males", "Spi+_Females",  "Spi+_Males"), col=c('black','red','blue', 'green'), lty = 1, lwd=2, box.lty = 1)

## Evaluation of the impact of Spiroplasma on the survival under feeding condition (figure 5B) 

data_surv_fed <- read.csv("survival_feeding.csv")

data_surv_fed$Replicate=as.factor(data_surv_fed$Replicate)
data_surv_fed$Treatment=as.factor(data_surv_fed$Treatment)
data_surv_fed$Sex=as.factor(data_surv_fed$Sex)
data_surv_fed$Infection=as.factor(data_surv_fed$Infection)
data_surv_fed$Day=as.numeric(data_surv_fed$Day)
data_surv_fed=na.omit(data_surv_fed)
#Plot of the survival (figure 5B)
str(data_surv_fed)
km <- with(data_surv_fed, Surv(Day, Status))
head(km,49)
km_trt_fit <- survfit(Surv(Day, Status) ~ Treatment, data=data_surv_fed)
plot(survfit(Surv(Day,Status)~Treatment,data=data_surv_fed), xlab = "Day", ylab = "Survival rate", col=c('black','red','blue','green'), lwd=2, xlim =c(0, 60)) 
legend('bottomleft', text.font = , cex=0.8, c("Spi-_Females","Spi-_Males", "Spi+_Females",  "Spi+_Males"), col=c('black','red','blue', 'green'), lty = 1, lwd=2, box.lty = 1)

###Analysis of the significant difference according to the status and the sex
##survival of the males mated only
data_surv2 <- read.csv("survival males single mating.csv")
head(data_surv2)

data_surv2$Replicate=as.factor(data_surv2$Replicate)
data_surv2$Treatment=as.factor(data_surv2$Treatment)
data_surv2$Sex=as.factor(data_surv2$Sex)
data_surv2$Infection=as.factor(data_surv2$Infection)
data_surv2$Day=as.numeric(data_surv2$Day)

str(data_surv2)
summary(data_surv2)

library(coxme)
mod1<- coxme(Surv(Day, Status)~Treatment + ( 1|Replicate), data = data_surv2)
summary(mod1)
anova(mod1)

data_surv2$Treatment <- factor(trimws(data_surv2$Treatment),
                               levels = c("Spi-_Males", "Spi+_Males"))

##survival of the females mated only
data_surv3 <- read.csv("survival_females_single_mating.csv")

head(data_surv3)
data_surv3$Replicate=as.factor(data_surv3$Replicate)
data_surv3$Treatment=as.factor(data_surv3$Treatment)
data_surv3$Sex=as.factor(data_surv3$Sex)
data_surv3$Infection=as.factor(data_surv3$Infection)
data_surv3$Days=as.numeric(data_surv3$Days)

str(data_surv3)
summary(data_surv3)

library(coxme)
mod1<- coxme(Surv(Days, Status)~Treatment + ( 1|Replicate), data = data_surv3)
summary(mod1)
anova(mod1)

## Evaluation of the impact of Spiroplasma on the relative mating index (figure 6A)
data_fc <- read.csv("RMI.csv")
str(data_fc)
data_fc <- transform(data_fc, Female_Mated=factor(Female_Mated),
                     Status=factor(Status)) 
model1<-lm(RMI ~ Female_Mated*Status + ( 1|replicate), data = data_fc)
summary(model1)
anova(model1)
Anova(model1)

#plot of the RMI (figure 6A)
shapiro.test(data_fc$RMI) #p>0.05,  normally distributed, so use t.test
fig6A.tiff <- ggplot(data_fc,aes(x=Status ,y=RMI, fill = Status)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") + # Add points to plot
  theme_bw() + labs( x = "Infection status", y = "Relative Mating Index (%)") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(0, 70) + scale_fill_manual(values=c("skyblue", "coral1")) + stat_compare_means(method = "t.test") + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
fig6A.tiff
tiff("fig6A.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(fig6A.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Males' infection status"))) + ylab( expression (paste (bold("Relative Mating Index (%)"), )))
dev.off()


## Evaluation of the impact of Spiroplasma on the mating latency (supplementary figure 7)

library(readxl)
data <- read_excel("field cage.xlsx")
str(data)
data$replicate = as.factor(data$replicate)
data$treatment = as.factor(data$treatment)
data$females = as.factor(data$females)

str(data)
summary(data)
attach(data)

###Modelling##
library(rcompanion)
plotNormalHistogram(latency)

hist(latency,freq=FALSE, col="grey")                             
lines(density(latency, na.rm=TRUE), col="red")  
library(car)
qqPlot(latency)
shapiro.test(latency) #data are not normally distributed, the Shapiro test also saw p value<0.05

library(lme4)
mod0<-glm(latency~treatment*females, family = "gaussian")

#Test of fit
overdisp_fun <- function(mod0) {
  rdf <- df.residual(mod0)
  rp <- residuals(mod0,type="pearson")
  Pearson.chisq <- sum(rp^2)
  prat <- Pearson.chisq/rdf
  pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
  c(chisq=Pearson.chisq, rdf=rdf, ratio=prat ,p.val=pval)
}
overdisp_fun(mod0) ##  overdispersed p<0.001, transformation is needed for Gaussian family application

#Tukey transformation
T_tuk = transformTukey(latency, plotit = FALSE)# lambda=0.25 to use for transformation
plotNormalHistogram(T_tuk)# looks better normality distributed

#----------------------model selection--------------##
y<-(latency^0.25)
mod <- glm(y~treatment*females, family = "gaussian")

library(MuMIn)
AICc(mod)#mod is the best model
summary(mod)

library(car)
Anova(mod)

#The mating latency did not differ Significantly between

# treatment           X2 = 0.0047  df = 1     p = 0.94526
# females             X2 = 0.1861  df = 1     p = 0.66620
# treatment:females   X2 = 3.2745  df = 1     p = 0.07037 .

#Pairwise comparison is not needed but we can still do it

#multiple comparison
library(emmeans)
summary(emmeans(mod, pairwise~ treatment*females), type="response")

#plot of the mating latency according to the infection status (supplementary figure 5)
figS7.tiff <- ggplot(data,aes(x=treatment ,y=latency, fill = treatment)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") +
  theme_bw() + labs( x = "Males' infection status", y = "Mating latency (Min)") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(0, 200) + scale_fill_manual(values=c("skyblue", "coral1")) + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
figS7.tiff
tiff("figS7.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(figS7.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Males infection status"))) + ylab( expression (paste (bold("Mating latency (Min)"), )))
dev.off()


## Evaluation of the impact of Spiroplasma on the mating duration (figure 6B) 

#Modelling the mating duration##
library(rcompanion)
plotNormalHistogram(duration)

hist(duration,freq=FALSE, col="grey")                             
lines(density(duration, na.rm=TRUE), col="red")  
library(car)
qqPlot(duration)
shapiro.test(duration)

#data are normally distributed, the Shapiro test also saw p value>0.05, data do not need to be transformed
library(lme4)

mod0<-glm(duration~treatment*females, data=data, family="gaussian")
mod0

##Shapiro test on the model residuals # residuals are not normally distributed
library(car)

overdisp_fun <- function(mod0) {
  rdf <- df.residual(mod0)
  rp <- residuals(mod0,type="pearson")
  Pearson.chisq <- sum(rp^2)
  prat <- Pearson.chisq/rdf
  pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
  c(chisq=Pearson.chisq, rdf=rdf, ratio=prat ,p.val=pval)
}
overdisp_fun(mod0) ##  overdispersed p<0.001, transformation is needed for Gaussian familly application

#Tukey transformation
T_tuk = transformTukey(duration, plotit = FALSE)# lambda=0.25 to use for transformation
plotNormalHistogram(T_tuk)# looks better normality distributed

#----------------------model selection--------------##
y<-(duration^0.7)
mod <- glm(y~treatment*females, family = "gaussian")

library(MuMIn)
AICc(mod)
summary(mod)

library(car)
Anova(mod)

#The mating duration varied Significantly between females status and 
#the interaction between females and males status

#Response:          X2      df  pvalue  

#treatment           0.0000  1    0.99754    
#females             5.4162  1    0.01995 *  
#treatment:females  15.2850  1  9.245e-05 ***

#Pairwise comparison is needed

#multiple comparison
library(emmeans)
summary(emmeans(mod, pairwise~ treatment*females), type="response")

#Plot of the mating duration according to the female status (figure 6B)

fig6B.tiff <- ggplot(data,aes(x=females ,y=duration, fill = females)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") +
  theme_bw() + labs( x = "Males' infection status", y = "Mating duration (Min)") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(0, 170) + scale_fill_manual(values=c("skyblue", "coral1")) + stat_compare_means(method = "wilcox.test") + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
fig6B.tiff
tiff("fig6B.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(fig6B.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Females' infection status"))) + ylab( expression (paste (bold("Mating duration (Min)"), )))
dev.off()

## Evaluation of the impact of Spiroplasma on the Spermatheca fill after competitve mating test analysis (figure 6C)
data <- transform(data,
                  replicate=factor(replicate),
                  treatment=factor(treatment), 
                  spermfill=as.numeric(spermfill))
str(data)
summary(data)
attach(data)
library(rcompanion)
plotNormalHistogram(duration)

##About data normality##
hist(spermfill,freq=FALSE, col="grey")                             
lines(density(spermfill, na.rm=TRUE), col="red")  
library(car)
qqPlot(spermfill)
shapiro.test(spermfill)

# Data are not normally distributed and need to be transformed 
library(rcompanion)
plotNormalHistogram(spermfill)

#Tukey transformation
T_tuk = transformTukey(spermfill, plotit = FALSE)# lambda=1.2 to use for transformation
plotNormalHistogram(T_tuk)# looks better normality distr

#---------------------treatment abnd dose --model selection--------------##
y<-(spermfill^1.925)
mod <- glm(y~treatment*females*duration*latency, family = "gaussian")

library(MuMIn)
AICc(mod)#mod3 is the best model with the AIC=119.7579, the 2nd = mod6
summary(mod)

library(car)
Anova(mod)  # Stats show that the interaction treatment*duration has no significant difference, only female+duration has p<0.05

#----multiple comparison------------##
library(emmeans)
summary(emmeans(mod, pairwise~ treatment), type="response")
summary(emmeans(mod, pairwise~ females), type="response")
summary(emmeans(mod, pairwise~ duration), type="response")
summary(emmeans(mod, pairwise~ latency), type="response")
summary(emmeans(mod, pairwise~ females*duration), type="response") 

#Plot of the spermatheca fill according to the males status (figure 6C)
fig6C.tiff <- ggplot(data,aes(x=treatment ,y=spermfill, fill = treatment)) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 15, col = "black", fill = "white") +
  theme_bw() + labs( x = "Males' infection status", y = "Spermatheca filling score") + 
  geom_jitter(width=0.1,alpha=0.3)+ ylim(0, 1) + scale_fill_manual(values=c("skyblue", "coral1")) + 
  theme(axis.text.x = element_text(color="black", size=12, face = "bold", angle=0),
        axis.text.y = element_text(color="black", size=12, face = "bold", angle=0),
        axis.title = element_text(color="black", size=12, face = "bold", angle=0),
        strip.background =element_rect(fill="gray"),
        strip.text = element_text(size = 12, colour="black"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
  theme(legend.position = "no")
fig6C.tiff
tiff("fig6C.tiff", width = 3, height = 4, units = 'in', res = 300)
plot(fig6C.tiff + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Males' infection status"))) + ylab( expression (paste (bold("Sermatheca filling score"), )))
dev.off()
